home *** CD-ROM | disk | FTP | other *** search
- ;-----------------------------------------------------------------------------
- ;S404 highly optimized data_decruncher_lib 1.0_turbo for use in stc.library
- ;23.11.93 by Marcus 'Cozine' Ottosson
- ;
- ; This is a highly optimized library decruncher. The code does not fit into
- ; the instruction cache (286 bytes). Generally, it's NOT possible to use all
- ; the 256 bytes available in the instruction cache. The caches in the 68020
- ; and the 68030 consist of 16 rows, each of which contains 16 bytes.
- ; AllocMem() and AllocPooled(), the memory allocation routines used for
- ; loading executables, guarantee 8-byte alignment.
- ; On the 68000 my large testfile decrunches 3.7% faster in this version than
- ; the standard version. Some files may decrunch much slower on the 68020 and
- ; the 68030. I don't know the maximum loss, probably about 10-20%. The loss
- ; will not be very high when we are multitasking, since the interrupts cause a
- ; partial or complete cache flush anyway. I want some some test reports!!!
- ;
- ;-----------------------------------------------------------------------------
- ;S404 data_decruncher v0.2
- ;(c) 1993 by Jouni 'Mr.Spiv' Korhonen (SWSW)
- ;-----------------------------------------------------------------------------
- ;call with registers: a2 = destination address
- ; a1 = crunched data
- ;-----------------------------------------------------------------------------
-
- ;Uses d0-d7/a0-a6
-
-
-
-
- decrunch:
- l0: addq #8,a1
- edDCopyableStart:
- move.l a2,a5
- add.l (a1)+,a2
- add.l (a1),a1
- edDCopyableStart2:
- moveq #0,d4
- moveq #16,d5
- movem (a1),d2/d6/d7
- not d4
- lea loff6(pc),a3
- lea llen5a(pc),a4
- moveq #1,d0
- moveq #-1,d3
- bra.s ltest1
-
- cnop 0,8 ; Use if main loop>=244 bytes
-
- ;*** Here's the start of the instruction cache
-
- lins: subq #8,d7
- bpl.s lins2
- lins1: move d7,d1
- addq #8,d7
- lsl.l d7,d6
- move -(a1),d6
- neg d1
- lsl.l d1,d6
- addq #8,d7
- swap d6
- move.b d6,-(a2)
- swap d6
- cmp.l a2,a5
- dbhs d7,lmain
- bra.s lexma
-
- lins2: rol #8,d6
- move.b d6,-(a2)
- ltest1: cmp.l a2,a5
- dbhs d7,lmain
- lexma bhs.s lexit
-
- lmain1: move -(a1),d6
- moveq #15,d7
- lmain: add d6,d6
- bcc.s lins
-
- dbf d7,llen1
- move -(a1),d6
- moveq #15,d7
- llen1: add d6,d6
- bcs.s llen6
- moveq #2,d1
- moveq #4-2,d3
- dbf d7,llen2
- move -(a1),d6
- moveq #15,d7
- llen2: add d6,d6
- bcs.s llen5
- dbf d7,llen3
- move -(a1),d6
- moveq #15,d7
- llen3: add d6,d6
- bcc.s llen4
- moveq #4,d1
- moveq #8-2,d3
- lea llen3a(pc),a6
- bra.s lbits
- llen3a: add d1,d3
- cmp #15,d1
- blo.s loff1
-
- moveq #5,d1
- moveq #14-1,d3
- lea llen3b(pc),a6
- bra.s lbits
-
- llen4: moveq #23-2,d3
- lloop: moveq #8,d1
- llen5: move.l a4,a6
- bra.s lbits
- llen5a: add d1,d3
- not.b d1
- dbeq d7,loff2
- bne.s loff2a
- bra.s lloop
-
- loff6: add d1,a0
- move.b (a0),-(a2)
- lcopy: move.b -(a0),-(a2)
- dbf d3,lcopy
- ltest: cmp.l a2,a5
- dbhs d7,lmain
- blo.s lmain1
- lexit: rts
-
- llen6: dbf d7,llen7
- move -(a1),d6
- moveq #15,d7
- llen7: add d6,d6
- addx d0,d3
- loff1: dbf d7,loff2
- loff2a: move -(a1),d6
- moveq #15,d7
- loff2: add d6,d6
- bcs.s loff3
-
- dbf d7,loff4
- move -(a1),d6
- moveq #15,d7
- loff4: moveq #9,d1
- lea 32(a2),a0
- add d6,d6
- bcc.s loff5
- moveq #5,d1
- move.l a2,a0
- bra.s loff5
- loff3: lea 544(a2),a0
- move d2,d1
- loff5: move.l a3,a6
-
- lbits: and.l d4,d6
- sub d1,d7
- bpl.s lbits2
- add d7,d1
- lsl.l d1,d6
- move d7,d1
- move -(a1),d6
- neg d1
- add d5,d7
- lbits2: lsl.l d1,d6
- move.l d6,d1
- swap d1
- jmp (a6)
-
- ; This part is not executed very often. Some files may decrunch much slower
- ; on the 68020/68030.
-
- llen3b: add d1,d3
- l2ins: subq #8,d7
- bmi.s l2ins1
- rol #8,d6
- move.b d6,-(a2)
- dbf d3,l2ins
- bra.s ltest
-
- l2ins1: move d7,d1
- addq #8,d7
- lsl.l d7,d6
- move -(a1),d6
- neg d1
- lsl.l d1,d6
- addq #8,d7
- swap d6
- move.b d6,-(a2)
- swap d6
- dbf d3,l2ins
- bra ltest
-
-